options(warn=-1)library(tidyr)
library(dplyr)##
## Attaching package: 'dplyr'
## The following objects are masked from 'package:stats':
##
## filter, lag
## The following objects are masked from 'package:base':
##
## intersect, setdiff, setequal, union
library(plotly)## Loading required package: ggplot2
##
## Attaching package: 'plotly'
## The following object is masked from 'package:ggplot2':
##
## last_plot
## The following object is masked from 'package:stats':
##
## filter
## The following object is masked from 'package:graphics':
##
## layout
library(rworldmap)## Loading required package: sp
## ### Welcome to rworldmap ###
## For a short introduction type : vignette('rworldmap')
library(maps)
library(ggmap)##
## Attaching package: 'ggmap'
## The following object is masked from 'package:plotly':
##
## wind
library(reshape2)##
## Attaching package: 'reshape2'
## The following object is masked from 'package:tidyr':
##
## smiths
library(raster)##
## Attaching package: 'raster'
## The following object is masked from 'package:plotly':
##
## select
## The following object is masked from 'package:dplyr':
##
## select
## The following object is masked from 'package:tidyr':
##
## extract
library(rgdal)## rgdal: version: 1.2-8, (SVN revision 663)
## Geospatial Data Abstraction Library extensions to R successfully loaded
## Loaded GDAL runtime: GDAL 2.1.3, released 2017/20/01
## Path to GDAL shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/gdal
## Loaded PROJ.4 runtime: Rel. 4.9.3, 15 August 2016, [PJ_VERSION: 493]
## Path to PROJ.4 shared files: /Library/Frameworks/R.framework/Versions/3.4/Resources/library/rgdal/proj
## Linking to sp version: 1.2-4
library(rgeos)## rgeos version: 0.3-23, (SVN revision 546)
## GEOS runtime version: 3.6.1-CAPI-1.10.1 r0
## Linking to sp version: 1.2-4
## Polygon checking: TRUE
terr = read.csv('~/Downloads/globalterrorismdb_0617dist.csv', check.names = FALSE, header = TRUE, stringsAsFactors = FALSE)terr=rename(terr, id=eventid, year=iyear, nation=country_txt,
Region=region_txt, attack=attacktype1_txt,
target=targtype1_txt, weapon=weaptype1_txt,
Killed=nkill, wounded=nwound)We clean the data
terr$Killed=as.integer(terr$Killed)
terr$wounded=as.integer(terr$wounded)
terr$Killed[which(is.na(terr$Killed))] = 0
terr$wounded[which(is.na(terr$wounded))] = 0
terr$casualties=as.integer(terr$Killed+terr$wounded)
terr$nation[terr$nation=="United States"] <- "USA"
terr$nation[terr$nation=="United Kingdom"] <- "UK"
terr$nation[terr$nation=="People's Republic of the Congo"] <- "Republic of Congo"
terr$nation[terr$nation=="Bosnia-Herzegovina"] <- "Bosnia and Herzegovina"
terr$nation[terr$nation=="Slovak Republic"] <- "Slovakia"global_t <-
terr %>%
group_by(year,nation,Region) %>%
summarize(Total=n())
global_y <-
global_t %>%
group_by(year) %>%
summarize(Total=sum(Total))
global_attacks <-
global_t %>%
group_by(nation) %>%
summarize(Total=sum(Total)) %>%
arrange(desc(Total))
attach(global_attacks)
global_n <- global_attacks[order(-Total),]
detach(global_attacks)Let’s look at the number of terrorist attacks with the passage of time.
gy <- global_y %>%
ggplot(mapping=aes(year,Total))+
geom_line(color="red")+
theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=45, vjust = 1))+
labs(x="Year", y="Number of attacks", title="Number of global attacks over years")
ggplotly(gy, width = 800, height=480)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
global_kills_years <-
terr %>%
group_by(year) %>%
summarize(killed=sum(Killed))
global_wound_years <-
terr %>%
group_by(year) %>%
summarize(wounded=sum(wounded))
globe <-
global_kills_years %>%
inner_join(global_wound_years, by="year") %>%
inner_join(global_y)## Joining, by = "year"
df <- melt(globe, "year")
df=rename(df, effect=variable)
gky <- df %>%
ggplot(mapping=aes(x=year,y=value, color=effect))+
geom_line()+
theme(panel.background = NULL, axis.text.x = element_text(angle=45, vjust = 1))+
labs(x="Year", y="Count", title="Number of people killed/wounded over years against attacks")
ggplotly(gky, width = 800, height=450)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
High peaks can be seen in the people killed in the year 1984. In 2001, even though there was a fall in no. of terrorist attacks, the number of casualties were on a peak. Number of casualties suddenly started rising from 2011 to 2015.
#get weapon most used in each nation
terr$casualties=as.integer(terr$Killed+terr$wounded)
terr$casualties[which(is.na(terr$casualties))]=0g_max_cas <- terr%>%
top_n(10, casualties) %>%
ggplot(mapping=aes(x=reorder(target1, -casualties), y=casualties, fill=target1)) +
geom_bar(stat = 'identity')+
theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=50, vjust = 1))+
labs(x="Target of attack", y="Number of casulaties", title="Terrorist attacks with most casualties")
ggplotly(g_max_cas)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
gname_max_cas <- terr[c('gname', 'casualties')]%>%
filter(gname!='Unknown') %>%
group_by(gname) %>%
summarize(Total=n())
g <- gname_max_cas %>%
top_n(40, Total) %>%
ggplot(mapping=aes(x=reorder(gname, -Total), y=Total, fill=gname)) +
geom_bar(stat = 'identity')+
theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=50, vjust = 1))+
labs(x="Terrorist group", y="Number of casulaties", title="Terrorist groups with most casualties")
ggplotly(g, width = 800, height = 450)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Let’s look at the 40 countries with maximum number of terrorist attacks, and 40 countries with least number of terrorist attacks
g2 <- global_n%>%
top_n(40) %>%
ggplot(mapping=aes(x=reorder(nation, -Total),y=Total,fill=nation)) +
geom_bar(stat='identity')+
theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=90, vjust = 1))+
labs(x="Countries", y="Number of attacks", title="Countries with most number of terrorist attacks")## Selecting by Total
ggplotly(g2, width = 800, height=450)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
g2 <- global_n%>%
top_n(-40)%>%
ggplot(mapping=aes(x=reorder(nation, Total),y=Total,fill=nation)) +
geom_bar(stat='identity')+
theme(legend.position="none", panel.background = NULL, axis.text.x = element_text(angle=90, vjust = 1))+
labs(x="Countries", y="Number of attacks", title="Countries with least number of terrorist attacks")## Selecting by Total
ggplotly(g2, width = 800, height=450)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
We’ll take a look at relationship of some parameters. These relations, however, do not directly imply causation. Further analysis should be done for implying causation.
g1 <- terr %>%
ggplot(aes(x = Region, y = casualties, fill=Region)) +
geom_boxplot() +
theme(legend.position = "none", axis.text.x = element_text(angle=45))
ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
We can see that Middle East & North Africa has a higher median number of casualties(2) than other regions, which same as that for Sub-Saharan Africa. The least variant region in terms of number of casualties is North America. However, it has lot many outliers, with the 9/11 attacks resulting in most number of casualties(8749).
g1 <- terr %>%
ggplot(aes(x = attack, y = casualties, fill=attack)) +
geom_boxplot() +
theme(legend.position = "none", axis.text.x = element_text(angle=45))
ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2, height = 500)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Leaving out the unknown attack types, no. of casualties are most highly variant in case of bombings/explosions. Hijacking and Hostage Taking(s) have low variance in no. of casualties, with outliers as high as 8749 in case of hijacking.
g1 <- terr %>%
ggplot(aes(x = weapon, y = casualties, fill=weapon)) +
geom_boxplot() +
theme(legend.position = "none", axis.text.x = element_text(angle=45))
ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2, height = 500)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
The no. of casualties because of chemical weapons has been highly variant, with 25% of chemical attacks resultin casualties between 50 to 5513. There hasn’t been any casualty because of radilogical weapons.
g1 <- terr %>%
ggplot(aes(x = target, y = casualties, fill=target)) +
geom_boxplot() +
theme(legend.position = "none", axis.text.x = element_text(angle=45))
ylim1 = boxplot.stats(terr$casualties)$stats[c(1,5)]
g2 <- g1+coord_cartesian(ylim = ylim1*1.05)
ggplotly(g2, height = 500)## We recommend that you use the dev version of ggplot2 with `ggplotly()`
## Install it with: `devtools::install_github('hadley/ggplot2')`
Casualties related to transporation, military, and Private citizens & property have high variance. Attacks on Police, and non-state militia generally lead to more than 1 casualty, with as many as 11 casualties in some cases.